Test

library(xts)  # Time series framework
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggplot2)  # Fancy plots
library(quantmod)  # Candle charts
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(plotly)  # Fancy plots 2.0
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
dd <- read.table("spy.txt", header = FALSE, sep = "", 
                 dec = ".", stringsAsFactors = FALSE)
dd <- data.frame(apply(dd[-1, ], 2, as.numeric))
colnames(dd) <- c("Date", "Bin", "Volume", "Price")
head(dd, 30)
##        Date Bin   Volume    Price
## 1  20121228   1  5137187 132.6409
## 2  20121228   2  5952532 132.5185
## 3  20121228   3  7166682 132.3489
## 4  20121228   4  4548464 132.5185
## 5  20121228   5  8297762 132.8670
## 6  20121228   6  8680173 132.8576
## 7  20121228   7  4285683 132.7634
## 8  20121228   8  3148610 132.6692
## 9  20121228   9  2761214 132.7257
## 10 20121228  10  1849520 132.7493
## 11 20121228  11  2882498 132.8670
## 12 20121228  12  2269608 132.8764
## 13 20121228  13  1601405 132.7917
## 14 20121228  14  1264754 132.7352
## 15 20121228  15  1809688 132.7917
## 16 20121228  16  1104409 132.7541
## 17 20121228  17  1527122 132.8293
## 18 20121228  18  2553620 132.6409
## 19 20121228  19  2115685 132.5468
## 20 20121228  20  2009043 132.5186
## 21 20121228  21  2458923 132.5750
## 22 20121228  22  1865370 132.4902
## 23 20121228  23  2861623 132.4573
## 24 20121228  24  5284794 132.5656
## 25 20121228  25 13616607 132.1605
## 26 20121228  26 26433386 131.7555
## 27 20121231   1 11161827 132.0569
## 28 20121231   2 10344856 132.2830
## 29 20121231   3  7564023 132.1605
## 30 20121231   4  7756575 132.2547
sum(dd$Volume[1:26])
## [1] 123486362
# Day: 2012-12-28
# Volume: 123,486,362
# Price: 132.75
# Actual volume: 2,426,680,000
# Actual price: 1,402.43

sum(dd$Volume[27:52])
## [1] 218373347
# Day: 2012-12-31
# Volume: 218,373,347
# Price: 132.2830
# Actual volume: 3,204,330,000
# Actual price: 1,426.19

expandDate <- function(s) {
  # yyyymmdd
  year <- substr(s, 1, 4)
  month <- substr(s, 5, 6)
  day <- substr(s, 7, 8)
  
  return(paste(year, month, day, sep = "-"))
}

dates <- unlist(lapply(dd$Date, expandDate))

nBins <- 26
startTrading <- 9.5
endTrading <- 16

dd <- apply(dd[, 3:4], 2, as.numeric)

intra <- NULL

for (k in 1:(nrow(dd)/nBins)) {
  if (k == 1) {
    intra <- startTrading*60*60 + seq(as.POSIXct(dates[(k - 1)*nBins + 1]), 
                                      by = 60*60*(endTrading - startTrading)/(nBins - 1),
                                      length.out = nBins)
  } else if (k > 1) {
    intra <- c(
      intra, 
      startTrading*60*60 + seq(as.POSIXct(dates[(k - 1)*nBins + 1]), 
                               by = 60*60*(endTrading - startTrading)/(nBins - 1), 
                               length.out = nBins)
    )
  }
}

dd <- xts(dd, order.by = intra)
head(dd)
##                      Volume    Price
## 2012-12-28 09:30:00 5137187 132.6409
## 2012-12-28 09:45:36 5952532 132.5185
## 2012-12-28 10:01:12 7166682 132.3489
## 2012-12-28 10:16:48 4548464 132.5185
## 2012-12-28 10:32:24 8297762 132.8670
## 2012-12-28 10:48:00 8680173 132.8576
meanVolume <- 0
sum <- 0
for (k in 1:(nrow(dd)/nBins)) {
  sum <- sum + as.numeric(sum(dd$Volume[(k - 1)*nBins + 1:nBins]))
}

meanVolume <- sum/(nrow(dd)/nBins)
# 97,586,165  (Average daily volume)

getDailyVolumeBars <- function(prices, volume, delta = 3.7e6) {
  # Delta = 3.7e6 - Standard value
  # Objective: generate volume-sampled candles
    # Time: time stamp of the last time bar
    # Open
    # High
    # Low
    # Close
  
  times <- index(prices)[1]
  times <- times[-1]
  bars <- c()
  
  volAcc <- 0
  residualVol <- 0
  lastReset <- 1
  
  for (k in 1:length(prices)) {
    volAcc <- volAcc + as.numeric(volume[k])
    while (volAcc > delta) {
      # Open
      open <- as.numeric(prices[lastReset])
      
      # High
      high <- max(prices[lastReset:k])
      
      # Low
      low <- min(prices[lastReset:k])
      
      # Close
      close <- as.numeric(prices[k])
      
      bars <- rbind(bars, c(open, high, low, close))
      times <- c(times, index(prices)[k])
      
      lastReset <- k
      volAcc <- volAcc - delta
    }
  }

  colnames(bars) <- c("Open", "High", "Low", "Close")
  bars <- xts(bars, order.by = times)
  return(bars)
}

MRes <- getDailyVolumeBars(prices = dd$Price, volume = dd$Volume)
head(MRes)
##                         Open     High      Low    Close
## 2012-12-28 09:30:00 132.6409 132.6409 132.6409 132.6409
## 2012-12-28 09:45:36 132.6409 132.6409 132.5185 132.5185
## 2012-12-28 10:01:12 132.5185 132.5185 132.3489 132.3489
## 2012-12-28 10:01:12 132.3489 132.3489 132.3489 132.3489
## 2012-12-28 10:16:48 132.3489 132.5185 132.3489 132.5185
## 2012-12-28 10:16:48 132.5185 132.5185 132.5185 132.5185
dfAux <- data.frame(Bars = 1:nrow(MRes), coredata(MRes))[1:30, ]
fig <- dfAux %>% plot_ly(x = ~Bars, type = "candlestick",
                         open = ~Open, close = ~Close,
                         high = ~High, low = ~Low) 
fig <- fig %>% layout(title = "Basic Candlestick Chart (Daily Volume / 26)")
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
volRet <- na.omit(diff(log(MRes$Close)))

MRes <- getDailyVolumeBars(prices = dd$Price, volume = dd$Volume, delta = 100e6)
head(MRes)
##                         Open     High      Low    Close
## 2012-12-28 16:00:00 132.6409 132.8764 131.7555 131.7555
## 2012-12-31 12:21:36 131.7555 132.3772 131.7555 132.3018
## 2012-12-31 15:44:24 132.3018 133.8184 132.3018 133.6395
## 2013-01-02 10:48:00 133.6395 137.0495 133.6395 136.6538
## 2013-01-02 16:00:00 136.6538 137.6711 136.3900 137.6711
## 2013-01-03 14:42:00 137.6711 137.8125 137.2190 137.3320
dfAux <- data.frame(Bars = 1:nrow(MRes), coredata(MRes))[1:50, ]
fig <- dfAux %>% plot_ly(x = ~Bars, type = "candlestick",
                         open = ~Open, close = ~Close,
                         high = ~High, low = ~Low) 
fig <- fig %>% layout(title = "Basic Candlestick Chart (Daily Volume)")
fig
volRet <- na.omit(diff(log(MRes$Close)))

Volume bars

plot(as.vector(volRet), col = "blue", lwd = 1, type = "l", 
     xlab = "Volume bar", ylab = "returns", main = "Log-returns sampled by volume")

h <- hist(volRet, breaks = 100, prob = TRUE, col = "lightgray", 
          xlab = "return", main = "Histogram of log-returns sampled by volume")
xfit <- seq(min(volRet), max(volRet), length = 100) 
yfit <- dnorm(xfit, mean = mean(volRet), sd = sd(volRet))
lines(xfit, yfit, col = "blue", lwd = 2)

qqnorm(volRet, col = "blue", main = "QQ plot of log-returns sampled by volume")
qqline(volRet, lwd = 2)

Time bars

GSPC <- getSymbols("^GSPC", from = index(dd)[1], to = tail(index(dd),), 
                   auto.assign = FALSE)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
prices <- Cl(GSPC)
R_daily <- diff(log(prices))[-1]
plot(R_daily, col = "blue", lwd = 1, ylab = "log-return", 
     main = "Returns sampled by time")

h <- hist(as.vector(R_daily), breaks = 100, prob = TRUE, col = "lightgray", 
          xlab = "return", main = "Histogram of log-returns sampled by time")
xfit <- seq(min(R_daily), max(R_daily), length = 100) 
yfit <- dnorm(xfit, mean = mean(R_daily), sd = sd(R_daily))
lines(xfit, yfit, col = "blue", lwd = 2)

qqnorm(R_daily, col = "blue", main = "QQ plot of log-returns sampled by time")
qqline(R_daily, lwd = 2)